#---------------------------------------------------------------------------
# Statistical results in main text of "Bypass Aid and Unrest in Autocracies"
# Matthew DiLorenzo
# mdilorenzo@wm.edu
# Last updated 08-21-2017
#---------------------------------------------------------------------------

#------------------------------------
# Load packages and functions
#------------------------------------
## Insert working directory here
 setwd("~/Dropbox/article-manuscripts/baua-isq/baua-isq-final/replication-files/")
## setwd("")

## Load packages
packages <- c("foreign", "MASS", "stargazer", "lmtest", "sandwich", 
              "car", "dplyr", "tikzDevice", "ggplot2", "gridExtra")

lapply(packages, function(x) require(x, character.only = TRUE))



## Load two custom functions: empiricalMarginsNB() and twostageboot()
source("baua-custom-functions.R")


#------------------------------------
# Load and transform data
#------------------------------------
## Load data set
load(file = "baua-isq-replication-data.RData")
names(dat)


## Correcting for scale of variables
dat$bypass_pop_log <- log(
  ((dat$bypass2_t_t1*1000000) / (dat$population_t1*1000)) + 1
  )

dat$public_pop_log <- log(
  ((dat$public_t_t1*1000000) / (dat$population_t1*1000)) + 1
  )

#------------------------------------
# Table 1: Summary Statistics
#------------------------------------
summary_variables <- c("unrest", 
                       "bypass_pop_log", 
                       "public_pop_log", 
                       "pct_bypass2_p_t1", 
                       "civil_conflict_t1", 
                       "nd_count_t1", 
                       "gov_index_t1")

summary_data<- dat[dat$year %in% 2005:2010, summary_variables]

colnames(summary_data) <- c("Unrest Events", 
                            "Bypass Aid/Capita (log)",
                            "Government Aid/Capita (log)", 
                            "Bypass Ratio",
                            "Civil Conflicts", 
                            "Natural Disasters", 
                            "Governance Index")

stargazer(summary_data,
          title = "Summary Statistics for Key Variables",
          label = "summary_table",
          digits = 1,
          notes = c("Note: Data sources described in text."),
          out = "../final-fix/table-1.html")


#------------------------------------
#  Figure 1: Scatterplot 
#------------------------------------

## For PDF version
pdf("../final-fix/figure-1.pdf",
    width = 8.5, height = 4)
par(mfrow = c(1, 2))
with(dat, plot(bypass_pop_log, unrest,
               ylim = c(0, 20),
               main = "Bypass per capita (log)",
               xlab = "Bypass per capita (log)",
               ylab = "Unrest Events",
               pch = 1,
               cex = .5))
with(dat, plot(pct_bypass2_p_t1, unrest,
               ylim = c(0, 20),
               ylab = "",
               xlab = "Bypass Ratio",
               main = "Bypass Ratio",
               pch = 2,
               cex = .5))
dev.off()


# -----------------------------------------------------
#  Table 2: Bypass Aid and Domestic Unrest, 2005-2010
# -----------------------------------------------------
## Model 1
m1 <- glm.nb(unrest ~ bypass_pop_log +
               gov_index_t1 + 
               civil_conflict_t1 + 
               nd_count_t1,
             data = dat, 
             na.action = "na.omit")
summary(m1)
## Model 2
m2 <- update(m1, . ~ pct_bypass2_p_t1 + . - bypass_pop_log)

## Model 3
m3 <- update(m1, . ~ . + factor(cowcode) + factor(year) - 1, maxit = 100)

## Model 4
m4 <- update(m3, . ~ . + public_pop_log)

## Model 5
m5 <- update(m2, . ~ . + factor(cowcode) + factor(year) - 1, maxit = 100)

## Model 6
lpm1 <- lm(unrest_dummy ~ bypass_pop_log + 
             gov_index_t1 + 
             civil_conflict_t1 + 
             nd_count_t1 + 
             public_pop_log +
             factor(cowcode) + 
             factor(year) - 1,
           data = dat, 
           na.action = "na.omit")

## Model 7
lpm2 <- update(lpm1, . ~ pct_bypass2_p_t1 + . - bypass_pop_log - public_pop_log)

## Collect robust standard errors in list for table
models <- list(m1, m2, m3, m4, m5, lpm1, lpm2)
ses <- lapply(models, function(x) sqrt(diag(vcovHC(x, type = "HC1"))))

## Variable names for covariates
covariates <- c("Bypass Aid / Capita",
                "Bypass Ratio",
                "Governance Index",
                "Civil Conflicts",
                "Natural Disasters",
                "Government Aid / Capita")

## Export to table format 
stargazer(m1, m2, m3, m4, m5, lpm1, lpm2,
          se = ses,
          label = "naive-models",
          title = "Bypass Aid and Domestic Unrest, 2005-2010",
          column.labels = c("\\emph{Model 1}", "\\emph{Model 2}", 
                            "\\emph{Model 3}", "\\emph{Model 4}", 
                            "\\emph{Model 5}", "\\emph{Model 6}",
                            "\\emph{Model 7}"),
          dep.var.caption = c("Dependent Variable: Unrest Events"),
          dep.var.labels.include = F,
          covariate.labels = covariates,
          omit = c("cowcode", "year", "Constant"),
          p.auto = TRUE,
          t.auto = TRUE,
          digits = 2,
          column.sep.width = c("0pt"),
          add.lines = list(c("Country-fixed Effects", rep("N", 2) ,rep("Y", 5)),
                           c("Year-fixed Effects", rep("N", 2) ,rep("Y", 5))),
          notes.append = TRUE,
          notes.align = "l",
          notes.label = "",
          font.size = "footnotesize",
          notes = c("Two-tailed tests. (\\possessivecite{White1980} HC1 standard errors)", 
                    "Negative binomial regression (Models 1-5) and OLS (Models 6 and 7). Outcome in Models 6 and 7 is dichotomous indicator of at least one unrest event."),
          float.env = "sidewaystable",
          model.names = FALSE,
          model.numbers = FALSE,
          no.space = TRUE,
          out = "../final-fix/table-2.html")



#-------------------------------------------------------
#  Table 3: Substantive Interpretation for Naive Models
#-------------------------------------------------------

## Get average percentage change in sample with 95% confidence interval
set.seed(484) # From random.org in [0, 1000]
r1 <- empiricalMarginsNB(m1, "bypass_pop_log", "min", "mean", 1000)[2,]
r2 <- empiricalMarginsNB(m2, "pct_bypass2_p_t1", "min", "mean", 1000)[2,]
r3 <- empiricalMarginsNB(m3, "bypass_pop_log", "min", "mean", 1000)[2,]
r4 <- empiricalMarginsNB(m4, "bypass_pop_log", "min", "mean", 1000)[2,]
r5 <- empiricalMarginsNB(m5, "pct_bypass2_p_t1", "min", "mean", 1000)[2,]

## Bind estimates together
substantive_table <- cbind(rep(NA, 5),
                           round(rbind(r1, r2, r3, r4, r5), 2))

## Rename table columns and rows
colnames(substantive_table) <- c(" ",
                                 "Mean % Change", 
                                 "95% lower", 
                                 "95% upper")
rownames(substantive_table) <- c(paste("Model", 1:5, sep = " "))

## Create table
stargazer(substantive_table,
          no.space = TRUE,
          title = "Substantive Effects from Table \\ref{naive-models}",
          label = "change-counts-table",
          notes.append = TRUE,
          notes.align = "l",
          notes.label = "",
          notes = c("Note: Changes from zero to sample means.",
                    "95\\% CIs from 1000 sets of simulated coefficients."),
          digits = 2,
          out = "../final-fix/table-3.html")



mean(substantive_table[,2])


#-----------------------------------------------------------
# Instrumental variables models
#-----------------------------------------------------------
## Model 8, stage 1
mp1 <- lm(bypass_transform ~ aid_scandals_t2_major +
           gov_index_t1 + 
           civil_conflict_t1 + 
           nd_count_t1 + 
           mean_global_growth_lag_2 + 
           mean_global_inflation_lag_2 + 
           total_global_disasters_lag_2, 
         data = dat)

## Get data frame for the updated model with extra variables
dat_iv_1 <- model.frame(update(mp1, . ~ . + unrest + pct_bypass2_p_t1 + year))

## Model 8, stage 2 --- for twostageboot() function
mp2 <- glm.nb(unrest ~ pct_bypass2_p_t1 +
               gov_index_t1 + 
               civil_conflict_t1 + 
               nd_count_t1  + 
               mean_global_growth_lag_2 + 
               mean_global_inflation_lag_2 +
               total_global_disasters_lag_2,
             data = dat_iv_1, 
             na.action = "na.omit")


## Model 9, stage 1
mp1a <- update(mp1, . ~ aid_scandals_t2_extra_continent + . - aid_scandals_t2_major)

dat_iv_2 <- model.frame(update(mp1a, . ~ . + unrest))


## Model 10, stage 1
mp1b <- update(mp1, . ~ scandals_affinity_weighted_past_10 + . + factor(year) - aid_scandals_t2_major - mean_global_growth_lag_2 - mean_global_inflation_lag_2 - total_global_disasters_lag_2)

dat_iv_3 <- model.frame(update(mp1b, . ~ . + unrest + year))

## Model 10, stage 2 --- for twostageboot() function
mp2b <- update(mp2, . ~ . + 
                 factor(year) - mean_global_growth_lag_2 - 
                 mean_global_inflation_lag_2 - total_global_disasters_lag_2)




#-----------------------------------------------------------
# Uncorrected standard errors models
#-----------------------------------------------------------
## Note: The models above are those that get fed into the twostageboot() function
## to get bootstrapped standard errors.  The second-stage models below are those 
## that appear in the paper. The three chunks immediately following this note
## must be run in succession because they each replace the "predicted_ratio" 
## object for ease of creating a table. The models are the same, but the 
## twostageboot() function automatically performs the transformations below.

predicted_ratio <- 100*(1/(1+exp(-predict(update(mp1, data = dat_iv_1)))))
stage_2_naive_1 <- update(mp2, . ~ predicted_ratio + . - pct_bypass2_p_t1)

predicted_ratio <- 100*(1/(1+exp(-predict(update(mp1a, data = dat_iv_2)))))
stage_2_naive_2 <- update(mp2, . ~ predicted_ratio + . - pct_bypass2_p_t1)

predicted_ratio <- 100*(1/(1+exp(-predict(update(mp1b, data = dat_iv_3)))))
stage_2_naive_3 <- update(mp2b, . ~ predicted_ratio + . - pct_bypass2_p_t1)

## Variable names for table
stage_1_names <- c("Major Aid Scandals",
                   "Extra-Continental Scandals",
                   "Affinity-weighted Scandals",
                   "Governance Index",
                   "Civil Conflicts",
                   "Natural Disasters",
                   "Average Global GDP Growth",
                   "Average Global Inflation",
                   "Total Global Disasters")

## Variable names for table
stage_2_names <- c("Predicted Bypass Ratio",
                   "Governance Index",
                   "Civil Conflicts",
                   "Natural Disasters",
                   "Average Global GDP Growth",
                   "Average Global Inflation",
                   "Total Global Disasters")

## Get F-statistics on instrumental variables
f1 <- round(summary(mp1)$coefficients[2, "t value"]^2, 2)
f2 <- round(summary(mp1a)$coefficients[2, "t value"]^2, 2)
f3 <- round(summary(mp1b)$coefficients[2, "t value"]^2, 2)

## Export first stage to table format 
stargazer(mp1, mp1a, mp1b,
          no.space = TRUE,
          label = "stage-1-models",
          title = "Stage I - Aid Scandals and Aid Channel Distribution, 2005-2010",
          column.labels = c("\\emph{(8)}", 
                            "\\emph{(9)}", 
                            "\\emph{(10)}"),
          dep.var.caption = c("Log-Transformed Bypass Ratio"),
          dep.var.labels.include = F,
          model.numbers = FALSE,
          covariate.labels = stage_1_names,
          omit = c("cowcode", "year", "Constant"),
          p.auto = TRUE,
          t.auto = TRUE,
          digits = 2,
          column.sep.width = c("0pt"),
          add.lines = list(c("Year-fixed Effects", rep("N", 2) ,rep("Y", 1)),
                           c("F-Statistic on Instrument", f1, f2, f3)),
          notes.append = TRUE,
          notes.align = "l",
          notes.label = "",
          font.size = "footnotesize",
          notes = c("Two-tailed tests. Estimated standard errors in parentheses.", 
                    "OLS estimates."),
          model.names = FALSE,
          out = "first-stage-iv.html")


## Export second stage to table format 
stargazer(stage_2_naive_1, stage_2_naive_2, stage_2_naive_3,
          no.space = TRUE,
          label = "stage-2-models",
          title = "Stage II - Predicted Bypass Ratio and Unrest, 2005-2010",
          column.labels = c("\\emph{(8)}", 
                            "\\emph{(9)}", 
                            "\\emph{(10)}"),
          dep.var.caption = c("Unrest Events"),
          dep.var.labels.include = F,
          model.numbers = FALSE,
          covariate.labels = stage_2_names,
          omit = c("cowcode", "year", "Constant"),
          p.auto = TRUE,
          t.auto = TRUE,
          digits = 2,
          column.sep.width = c("0pt"),
          add.lines = list(c("Year-fixed Effects", rep("N", 2) ,rep("Y", 1))),
          notes.append = TRUE,
          notes.align = "l",
          notes.label = "",
          font.size = "footnotesize",
          notes = c("Two-tailed tests. Estimated standard errors in parentheses.", 
                    "Negative binomial regression models."),
          model.names = FALSE)



#-----------------------------------------------------------
# Table of bootstrapped coefficients and standard errors
#-----------------------------------------------------------
## Bootstrapped standard errors
set.seed(484)
b1 <- twostageboot(1000, mp1, mp2, "pct_bypass2_p_t1", 
                   base_data = dat_iv_1, log_t = T)

set.seed(484)
b2 <- twostageboot(1000, mp1a, mp2, "pct_bypass2_p_t1", 
                   base_data = dat_iv_2, log_t = T)

set.seed(484)
b3 <- twostageboot(1000, mp1b, mp2b, "pct_bypass2_p_t1", 
                   base_data = dat_iv_3, log_t = T)


## Prepare table
boot_table <- cbind(rep(NA, 3),
                    round(rbind(b1, b2, b3), 3))
colnames(boot_table) <- c(" ","Bypass Coefficient", 
                          "90% lower", "90% upper")
rownames(boot_table) <- c("Major Scandals", "Extra-Continental Scandals", 
                          "Affinity-weighted Scandals")

## Create table
stargazer(boot_table,
          title = "Second-stage Coefficients with Bootstrapped SEs",
          label = "bootstrap-table",
          notes.append = TRUE,
          notes.align = "l",
          notes.label = "",
          notes = c("90\\% CIs from bootstrapped SEs (B = 1000)"),
          digits = 2)

## Prepare quantities for figure
betas <- rev(c(b1[1], b2[1], b3[1]))
y <- c(1, 2, 3)

figure_labels <- c("Model 8: Major Scandals", 
                   "Model 9: \n Extra-Continental \n Scandals", 
                   "Model 10: \n Affinity-weighted \n Scandals")


## PDF version

pdf("baua-table-bootstrap-results-figure.pdf", 
    width = 8.5, height = 5)
par(oma = c(0, 6, 0, 0))
plot(betas, y,
     ylim = c(0, 4),
     xlim = c(-.125, 0),
     main = "Second-stage Coefficients with Bootstrapped SEs",
     xlab = "Estimated Second-Stage Coefficient on Bypass Ratio",
     ylab = "",
     yaxt = "n",
     pch = 18,
     cex = 1.5)

s <- 3
segments(b1[2], y[3], b1[3], y[3], lwd = s)
segments(b2[2], y[2], b2[3], y[2], lwd = s)
segments(b3[2], y[1], b3[3], y[1], lwd = s)
segments(0, 0, 0, 4, lty = "dotted")
axis(2, at = rev(y),
     labels = figure_labels,
     las = 1,
     cex = .15)
text(-0.095, 3.5,
     "Note: 90% CI, bootstrapped coefficients (B = 1000) \n Negative binomial regression models.",
     cex = .75)
dev.off()


